!expr params$title

Code
library(tidyverse)
library(Seurat)
library(tidyseurat)
library(readxl)
library(janitor)
library(ggpubr)
library(BayesPrism)
Warning: replacing previous import 'gplots::lowess' by 'stats::lowess' when
loading 'BayesPrism'
Warning: replacing previous import 'BiocParallel::register' by 'NMF::register'
when loading 'BayesPrism'
Code
library(ggpubr)
library(DT)
library(here)

#dataset_name <- "hub_01"
#dataset_name <- "hub_02"
dataset_name <- params$dataset_name # specified in the yaml header
recalculate_1 <- FALSE # seurat object processing combined object
recalculate_2 <- FALSE # seurat object processing sub object

seurat_obj <- read_rds(here("intermediate_data",paste0(dataset_name,"_filtered_seurat_obj.rds"))
   )
subfilter_by <- "hIO_BIHi250-A"
Code
if (recalculate_1) {
  # basic seurat processing for each assay
###--------------------------------------------------------------------------------------------
for (assay_it in Assays(seurat_obj)) {
  DefaultAssay(seurat_obj) <- assay_it
  seurat_obj <- seurat_obj |> 
    NormalizeData() 
  
  seurat_obj <- seurat_obj |>
    FindVariableFeatures(assay = assay_it) |> 
    ScaleData() 
  print("runPCA")
  seurat_obj <- seurat_obj |>
    RunPCA(assay = assay_it,reduction.name = paste0("pca.",assay_it)) 
  print("neighbors")
  seurat_obj <- seurat_obj |> FindNeighbors(dims = 1:30,
                                                  reduction = paste0("pca.",assay_it),
                                                  assay = assay_it) 
  
  print("clusters")
  seurat_obj <- seurat_obj |>  FindClusters(
    resolution = 0.8,
    verbose = FALSE,
    cluster.name =paste0(assay_it, "_clusters_leiden_res0.8"),algorithm=4
    #graph.name = paste0("kNN_","pca.",assay_it)
  ) 
  
  seurat_obj <- seurat_obj |>
    RunUMAP(dims = 1:30, reduction = paste0("pca.",assay_it),
            assay = assay_it,
            reduction.name =paste0("umap.",assay_it) )
  
  gc()
  


  }
###-----------------------------------------------------------------------------------
  write_rds(seurat_obj, file = here(
    "intermediate_data", paste0(dataset_name,"_filtered_seurat_obj_processed.rds" )))
  
}
Code
seurat_obj <- read_rds(file =  here(
  "intermediate_data", paste0(dataset_name,"_filtered_seurat_obj_processed.rds" )))

DefaultAssay(seurat_obj) <- "RNA"
Code
seurat_obj |> DimPlot(label = T)

Code
seurat_obj |> DimPlot(group.by = "cell_line_doublet_cellranger", split.by ="cell_line_doublet_cellranger" )

Code
seurat_obj |> DimPlot(group.by = "cell_line_doublet_cellranger" )

Code
#markers_left <- seurat_obj |> FindMarkers(ident.1=c(5,6,7,8,10,15,4,14))

#markers_left |> filter(avg_log2FC<0) |> head()
#markers_left |> filter(avg_log2FC>0) |> head()


#FeaturePlot(seurat_obj, c("KRT8","SOX2-OT"))
Code
s.genes <- cc.genes$s.genes |> str_to_title()
g2m.genes <- cc.genes$g2m.genes|> str_to_title()

seurat_obj<- CellCycleScoring(seurat_obj, s.features = s.genes, g2m.features = g2m.genes)
Warning: The following features are not present in the object: Mcm5, Pcna,
Tyms, Fen1, Mcm2, Mcm4, Rrm1, Ung, Gins2, Mcm6, Cdca7, Dtl, Prim1, Uhrf1,
Mlf1ip, Hells, Rfc2, Rpa2, Nasp, Rad51ap1, Gmnn, Wdr76, Slbp, Ccne2, Ubr7,
Pold3, Msh2, Atad2, Rad51, Rrm2, Cdc45, Cdc6, Exo1, Tipin, Dscc1, Blm,
Casp8ap2, Usp1, Clspn, Pola1, Chaf1b, Brip1, E2f8, not searching for symbol
synonyms
Warning: The following features are not present in the object: Hmgb2, Cdk1,
Nusap1, Ube2c, Birc5, Tpx2, Top2a, Ndc80, Cks2, Nuf2, Cks1b, Mki67, Tmpo,
Cenpf, Tacc3, Fam64a, Smc4, Ccnb2, Ckap2l, Ckap2, Aurkb, Bub1, Kif11, Anp32e,
Tubb4b, Gtse1, Kif20b, Hjurp, Cdca3, Hn1, Cdc20, Ttk, Cdc25c, Kif2c, Rangap1,
Ncapd2, Dlgap5, Cdca2, Cdca8, Ect2, Kif23, Hmmr, Aurka, Psrc1, Anln, Lbr,
Ckap5, Cenpe, Ctcf, Nek2, G2e3, Gas2l3, Cbx5, Cenpa, not searching for symbol
synonyms
Warning in AddModuleScore(object = object, features = features, name = name, :
Could not find enough features in the object from the following feature lists:
S.Score Attempting to match case...Could not find enough features in the object
from the following feature lists: G2M.Score Attempting to match case...
Code
DimPlot(seurat_obj, group.by = "Phase")

Code
Idents(seurat_obj) <- "cell_line_cellranger"

markers_cell_line<- FindAllMarkers(seurat_obj,  group.by ="cell_line_cellranger" ,only.pos = T) |> as_tibble()

#markers_cell_line |> filter(cluster=="EC_BIHi005-A")

markers_cell_line |> mutate(cluster=as_factor(cluster)) |>
  relocate(cluster, gene) |> 
  DT::datatable(
    colnames = c("cluster", "gene","p_val ","avg_log2FC", "pct.1", "pct.2", "p_val_adj"),
    filter = 'top',
    options = list(dom = 'tp',
     paging = TRUE,
    #   lengthMenu = c(5, 10, 25),
    #   pageLength = 10, 
      scrollY = TRUE
     )
  ) |> formatSignif(columns=c('p_val', 'avg_log2FC', 'p_val_adj'), digits=4 )
Warning in instance$preRenderHook(instance): It seems your data is too big for
client-side DataTables. You may consider server-side processing:
https://rstudio.github.io/DT/server.html
Code
seurat_obj |> FeaturePlot("XIST")

Code
seurat_obj |> VlnPlot("XIST")

Code
Idents(seurat_obj) <- "RNA_clusters_leiden_res0.8"
clusters_7<- FindMarkers(seurat_obj, ident.1 = 7) |> as_tibble(rownames="gene")
clusters_9<- FindMarkers(seurat_obj, ident.1 = 9) |> as_tibble(rownames="gene")

cluster_9_markers <-  clusters_9 |> slice_min(p_val, n = 30) |> pull(gene)

paste(cluster_9_markers, collapse = ", ")
[1] "UBE2C, LINC02672, H1-1, CDK1, KIF23, SHISA2, ANLN, DLGAP5, ECT2, TUBB4B, AURKB, TOP2A, DEPDC1, ASPM, TTK, KIFC1, COL2A1, DIAPH3, APOBEC3B, TPX2, LYPD6B, HMGB2, NUSAP1, CDCA3, KIF11, CDC25C, CCNB1, MKI67, PRC1, NEK2"
Code
variable_features_vec <- VariableFeatures(seurat_obj) |> paste( collapse = ", ")
tibble(genes= variable_features_vec)|> write_csv(file = here(paste0("output\\variable_genes_",dataset_name,".csv")))

tibble(genes= VariableFeatures(seurat_obj) ) |> write_csv(file = here(paste0("output\\variable_genes_",dataset_name,".csv")))

yu_gramp_Cell_2021_tHIO <- read_excel(here("../../raw_data/cubi_cusco/genesets_from_literature/yu...gramp_Cell_2021_tHIO.xlsx"))

module score function

Code
add_named_module_score <- function(seurat_object,named_gene_list){

seurat_obj <- AddModuleScore(seurat_obj, features=named_gene_list, name = "m.s.")

names_vec <-paste0("m.s.", names(named_gene_list))
names(names_vec) <- paste0("m.s.", 1:length(names_vec))

names(seurat_obj@meta.data) <- str_replace_all(names(seurat_obj@meta.data),names_vec)

return(seurat_obj)
}

seurat_obj@meta.data <- seurat_obj@meta.data[,1:14]

  vector_list <- yu_gramp_Cell_2021_tHIO %>%
  group_by(group) %>%
  summarise(value_vector = list(feature)) %>%
  deframe()
vector_list <- clean_names(vector_list)
names(vector_list) <-    paste0("t_hio__",names(vector_list)  )

#named_gene_list <- vector_list

seurat_obj<- add_named_module_score(seurat_obj,vector_list)
Warning: The following features are not present in the object: PVRL4, C9orf16,
not searching for symbol synonyms
Warning: The following features are not present in the object: H3F3A, NGFRAP1,
H3F3B, AES, not searching for symbol synonyms
Warning: The following features are not present in the object: TMEM173, C9orf3,
H3F3B, not searching for symbol synonyms
Warning: The following features are not present in the object: FAM183A, CCDC19,
TCTEX1D2, C5orf49, RABL5, EFCAB1, C9orf9, C9orf116, CCDC176, C1orf192, not
searching for symbol synonyms
Warning: The following features are not present in the object: FAM132A, SEPP1,
SLC9A3R1, C19orf77, C19orf69, RP11-59E19.1, not searching for symbol synonyms
Warning: The following features are not present in the object: SEPP1, C19orf77,
not searching for symbol synonyms
Warning: The following features are not present in the object: ERO1LB,
KIAA1244, C9orf16, FAM213A, not searching for symbol synonyms
Warning: The following features are not present in the object: NGFRAP1, not
searching for symbol synonyms
Warning: The following features are not present in the object: TSTA3, SELM, not
searching for symbol synonyms
Warning: The following features are not present in the object: RP11-834C11.4,
not searching for symbol synonyms
Warning: The following features are not present in the object: H2AFZ, H2AFV,
NGFRAP1, KIAA0101, not searching for symbol synonyms
Warning: The following features are not present in the object: PTRF, SELM, not
searching for symbol synonyms
Warning: The following features are not present in the object: NOTCH2NL,
RP11-404P21.3, not searching for symbol synonyms
Warning: The following features are not present in the object: GNB2L1, not
searching for symbol synonyms
Warning: The following features are not present in the object: MLTK, NGFRAP1,
not searching for symbol synonyms
Code
# vector_list <- yu_gramp_Cell_2021_tHIO %>%
#   group_by(group) %>%
#   summarise(value_vector = list(feature)) %>%
#   deframe()
# 
# vector_list <- clean_names(vector_list)
# 
# seurat_obj <- AddModuleScore(seurat_obj, features=vector_list, name = "module_score")
# 
# names_vec <-paste0("module_score_", names(vector_list))
# names(names_vec) <- paste0("module_score", 1:length(names_vec))
# 
# names(seurat_obj@meta.data) <- str_replace_all(names(seurat_obj@meta.data),names_vec)
Code
f1 <- function(x){x+theme(legend.position = "none")}

p1 <- FeaturePlot(seurat_obj, names(seurat_obj@meta.data)[15:29], ) |> lapply(f1)

p4 <- VlnPlot(seurat_obj, names(seurat_obj@meta.data)[15:29], pt.size = 0) |> lapply(f1) 
#ggarrange(plotlist=p4,)
#ggarrange(plotlist=p1)
p4 |> patchwork::wrap_plots()

Code
mod_score_umap <- FeaturePlot(seurat_obj, names(seurat_obj@meta.data)[15:29], )|> lapply(f1) 
ggarrange(plotlist = mod_score_umap)

Code
seurat_subset <- seurat_obj |> filter(cell_line_cellranger==subfilter_by)
Code
if(recalculate_2){
  # basic seurat processing for each assay
###--------------------------------------------------------------------------------------------
for (assay_it in Assays(seurat_subset)) {
  print(assay_it)
  DefaultAssay(seurat_subset) <- assay_it
  seurat_subset <- seurat_subset |> 
    NormalizeData() 
  
  seurat_subset <- seurat_subset |>
    FindVariableFeatures(assay = assay_it) |> 
    ScaleData() 
  print("runPCA")
  seurat_subset <- seurat_subset |>
    RunPCA(assay = assay_it,reduction.name = paste0("pca.",assay_it)) 
  print("neighbors")
  seurat_subset <- seurat_subset |> FindNeighbors(dims = 1:20,
                                                  reduction = paste0("pca.",assay_it),
                                                  assay = assay_it) 
  
  print("clusters")
  seurat_subset <- seurat_subset |>  FindClusters(
    resolution = 0.8,
    verbose = FALSE,
    cluster.name =paste0(assay_it, "_clusters_leiden_res0.8"),algorithm=4
    #graph.name = paste0("kNN_","pca.",assay_it)
  ) 
  
  seurat_subset <- seurat_subset |>
    RunUMAP(dims = 1:10, reduction = paste0("pca.",assay_it),
            assay = assay_it,
            reduction.name =paste0("umap.",assay_it) )
  
  gc()
}
    write_rds(seurat_subset, file = paste0("intermediate_data/", ))
  file = here(
    "intermediate_data", paste0(dataset_name,"_",subfilter_by,"_filtered_seurat_obj_processed.rds" ))
}


###-----------------------------------------------------------------------------------
Code
seurat_subset <- read_rds(file = here(paste0("intermediate_data/",dataset_name,"_",subfilter_by,"_filtered_seurat_obj_processed.rds" )))

DefaultAssay(seurat_subset) <-"RNA" 

seurat_subset |> DimPlot(reduction ="umap.RNA" )

Code
seurat_subset |> DimPlot(reduction ="pca.RNA" ,dims = c(1,2))

Code
seurat_subset |> DimPlot(reduction ="pca.RNA" ,dims = c(3,4))

Code
seurat_subset |> DimPlot(reduction ="pca.RNA" ,dims = c(4,5))

Code
seurat_subset |> FeaturePlot( "nCount_RNA" )

Code
seurat_subset |> FeaturePlot( "percent_mito" )

Code
seurat_subset|> FeaturePlot( "nFeature_RNA" )

Code
seurat_subset|> FeaturePlot( "nFeature_RNA" )

Code
seurat_subset_markers <- FindAllMarkers(seurat_subset) |> as_tibble()
seurat_subset_markers |> group_by(cluster) |> slice_min(p_val, n=10)
# A tibble: 130 × 7
# Groups:   cluster [13]
       p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene    
       <dbl>      <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>   
 1 1.52e-149       3.54 0.992 0.301 5.85e-145 1       IGFBP5  
 2 2.84e-123       3.48 0.806 0.166 1.09e-118 1       SERPINF1
 3 6.67e-114       3.03 0.849 0.207 2.58e-109 1       SMOC2   
 4 5.90e-107       2.98 0.845 0.197 2.28e-102 1       DLK1    
 5 1.05e- 94       2.25 0.901 0.282 4.06e- 90 1       LYPD6B  
 6 1.47e- 93       2.16 0.917 0.279 5.69e- 89 1       CLDN6   
 7 2.03e- 93       3.00 0.845 0.282 7.84e- 89 1       SEMA3C  
 8 8.00e- 93       2.31 0.897 0.294 3.09e- 88 1       WFDC2   
 9 6.19e- 92       3.44 0.611 0.106 2.39e- 87 1       SLC38A11
10 6.50e- 87       1.99 0.988 0.887 2.51e- 82 1       PLEKHA5 
# ℹ 120 more rows
Code
seurat_subset_markers |> filter(cluster==4, avg_log2FC>0) |> arrange(p_val) |> slice_min(p_val, n=30) |> pull(gene) |> 
  paste(collapse = ", ")
[1] "HES5, PTPRZ1, FRZB, HS3ST3A1, FAM181A, PAX3, FOXB1, NR2F1, PANTR1, LINC00461, DBX2, PALLD, BOC, FAT4, MEGF10, ID3, MIR99AHG, SOX2-OT, ILDR2, PAX6, NCALD, POU3F2, WNT5A, SOX3, VIM, ZEB2, LINC01414, LINC02306, LHFPL6, ID4"
Code
dings2 <- FeaturePlot(seurat_subset, names(seurat_subset@meta.data)[15:29], ) |> lapply(f1) 
ggarrange(plotlist = dings2)

Code
dings3 <- VlnPlot(seurat_subset, names(seurat_subset@meta.data)[15:29], pt.size = 0)|> lapply(f1) 
ggarrange(plotlist = dings3)